#!/usr/bin/env python3
"""
Aggregate JSON simulation summaries into CSV/Parquet.

Usage (new style):
    python scripts/aggregate.py runs_wide --output_prefix results\\summary

Usage (legacy style, still supported):
    python scripts/aggregate.py --runs-dir runs_wide --out results\\summary.csv
"""

import argparse, json, os
from pathlib import Path
from typing import Dict, Any, List
import pandas as pd

def flatten_dict(d: Dict[str, Any], parent_key: str = "", sep: str = ".") -> Dict[str, Any]:
    items: List[tuple] = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def parse_args():
    p = argparse.ArgumentParser(description="Aggregate JSON summaries into CSV/Parquet.")
    # New-style
    p.add_argument("input_dir", nargs="?", help="Directory of JSON files (new style)")
    p.add_argument("--output_prefix", default="summary", help="Output prefix (e.g., results\\summary)")
    # Legacy aliases
    p.add_argument("--runs-dir", dest="runs_dir", help="(legacy) directory of JSON files")
    p.add_argument("--out", dest="out", help="(legacy) full CSV path, e.g. results\\summary.csv")
    return p.parse_args()

def main():
    args = parse_args()

    # Resolve inputs/outputs from either style
    input_dir = args.input_dir or args.runs_dir
    if not input_dir:
        print("ERROR: No input directory provided. Use either `runs_wide` positional or `--runs-dir`.")
        return
    input_dir = Path(input_dir)

    if args.out:  # legacy explicit CSV path
        csv_path = Path(args.out)
        parquet_path = csv_path.with_suffix(".parquet")
    else:
        prefix = Path(args.output_prefix)
        csv_path = prefix.with_suffix(".csv")
        parquet_path = prefix.with_suffix(".parquet")

    # Gather JSONs
    rows: List[Dict[str, Any]] = []
    json_files = sorted(input_dir.glob("*.json"))
    if not json_files:
        print(f"No JSON files found in {input_dir}")
        return

    for jf in json_files:
        try:
            with open(jf, "r", encoding="utf-8") as f:
                data = json.load(f)
            rows.append(flatten_dict(data))
        except Exception as e:
            print(f"WARNING: failed to parse {jf}: {e}")

    if not rows:
        print(f"No valid JSON rows parsed from {input_dir}")
        return

    df = pd.DataFrame(rows)

    # Ensure parent directory exists
    csv_parent = csv_path.parent
    os.makedirs(csv_parent if str(csv_parent) else ".", exist_ok=True)

    df.to_csv(csv_path, index=False)
    try:
        df.to_parquet(parquet_path, index=False)
        print(f"Wrote {csv_path} and {parquet_path}")
    except Exception:
        print(f"Wrote {csv_path}; Parquet skipped (pyarrow/fastparquet not installed)")

if __name__ == "__main__":
    main()
